## The purpose of this file is to run the models for the python downloads project
##
## this assumes you have already run prepAndClean.R
## set your own paths here:
#paperPath = '/home/kaylea/Dropbox/Apps/Overleaf/Python Package Download Trends/'
#dataPath = './data/'

#paperPath = '/Users/mercedesross/Dropbox/Apps/Overleaf/Python Package Download Trends/'
paperPath = './' ## when I run it on hyak I'm just going to save the file locally
dataPath = './data/'

library(nlme)
library(dplyr)
library(texreg)

## load up helper lib to deal with knitr-R-Overleaf integration
source('lib-00-utils.R')

## load up our data for modeling
load(paste0(dataPath, 'readyToModel.RData'))

#load(paste('readyToModel.RData'))


### let's go ahead and make a new column where all the 'negative months' are just 0, that way we capture the discontinuity

vDF <- vDF %>% mutate(months.post.v = case_when(monthsSinceVuln < 0 ~ 0,
						TRUE ~ monthsSinceVuln))

fullDF <- fullDF %>% mutate(months.post.ai = case_when(monthsSinceGPT < 0 ~ 0,
						TRUE ~ monthsSinceGPT))

## some cleanup
vDF$months.post.v <- round(vDF$months.post.v)
fullDF$months.post.ai <- round(fullDF$months.post.ai)

### models go here

# Get ICC model 


#vulnerability Data set
#unconditional means model
model_1_vul <- lme(fixed = downloads ~ 1,
               data=vDF, 
               random= ~ 1 | package,
               method="ML")

summary(model_1_vul)

con <- textConnection("m1.vuln.tex", "w") #remembered
sink(con, split=TRUE, type="output")
texreg(model_1_vul, omit.coef = 'factor', stars=NULL, digits=2,
       custom.model.names=c('Unconditional Means Model'), #name here will appear at top of table
#       custom.coef.names=c('(Intercept)', '---> coefs here in a list<---'), 
       use.packages=FALSE, table=FALSE, ci.force = TRUE)
sink()
close(con);rm(con)



#unconditional means model + is_A
model_2_vul<- lme(fixed = downloads ~ 1 + monthOfLife,
                   data=vDF, 
                   random= ~ 1 | package,
                   method="ML")

summary(model_2_vul)


# + is_A + month
model_3_vul<- lme(fixed = downloads ~ 1 + is_AI + monthOfLife,
                  data=vDF, 
                  random= ~ 1 | package,
                  method="ML")

summary(model_3_vul)


# + is_A + month +months_since_vul
#model_4_vul<- lme(fixed = downloads ~ 1 + is_AI + monthOfLife+ monthsSinceVuln,
model_4_vul<- lme(fixed = downloads ~ 1 + is_AI + monthOfLife+ months.post.v,
                  data=vDF, 
                  random= ~ 1 | package,
                  method="ML")

summary(model_4_vul)


#+ is_A + month + months_since_vul
#model_5_vul<- lme(fixed = downloads ~ 1 + is_AI + monthOfLife+ monthsSinceVuln,
model_5_vul<- lme(fixed = downloads ~ 1 + is_AI + monthOfLife+ months.post.v,
                  data=vDF, 
                  random= ~ 1 | package,
                  method="ML")

summary(model_5_vul)

model_5_vul.whatif<- lme(fixed = downloads ~ 1 + is_AI + monthOfLife+ as.factor(months.post.v),
                  data=vDF, 
                  random= ~ 1 | package,
                  method="ML")

summary(model_5_vul.whatif)

model_full_vulNOAI <- lme(fixed = downloads ~ 1 + monthOfLife+ months.post.v,
                  data=vDF, 
                  random= ~ 1 | package,
                  method="ML")

summary(model_full_vulNOAI)

con <- textConnection("mfinal.vuln.tex", "w") #remembered
sink(con, split=TRUE, type="output")
texreg(model_full_vulNOAI, omit.coef = 'factor', stars=NULL, digits=2, include.variance=TRUE,
       custom.model.names=c('Trajectory Interrupted by Vulnerability Model'), #name here will appear at top of table
       custom.coef.names=c('(Intercept)', 'Months Old', 'Months After Vulnerability'), 
       use.packages=FALSE, table=FALSE, ci.force = TRUE)
sink()
close(con);rm(con)

con <- textConnection("m2.vuln.tex", "w") #remembered
sink(con, split=TRUE, type="output")
texreg(model_full_vulNOAI, omit.coef = 'factor', stars=NULL, digits=2,
       custom.model.names=c('Trajectory UNinterrupted by Vulnerability Model'), #name here will appear at top of table
#       custom.coef.names=c('(Intercept)', '---> coefs here in a list<---'), 
       use.packages=FALSE, table=FALSE, ci.force = TRUE)
sink()
close(con);rm(con)


compareVModels <- as.data.frame(BIC(model_1_vul, model_2_vul, model_5_vul, model_5_vul.whatif, model_full_vulNOAI)) #remembered

BIC(model_1_vul, model_2_vul, model_3_vul, model_4_vul, model_5_vul, model_5_vul.whatif, model_full_vulNOAI)
AIC(model_1_vul, model_2_vul, model_3_vul, model_4_vul, model_5_vul)

#looks like AIC and BIC prefer model 2
#model 6 does not work on my lap top (random slope model even with REML)
#-----------------------------------------------------------


#Full data set
#unconditional means model
model_1_full <- lme(fixed = downloads ~ 1,
                    data=fullDF, 
                    random= ~ 1 | package,
                    method="ML")

summary(model_1_full)

# + months of life
model_2_full <- lme(fixed = downloads ~ 1 + monthOfLife,
                   data=fullDF, 
                   random= ~ 1 | package,
                   method="ML")

summary(model_2_full)

# + months of life + is AI
model_3_full<- lme(fixed = downloads ~ 1 + monthOfLife + is_AI,
                  data=fullDF, 
                  random= ~ 1 | package,
                  method="ML")

summary(model_3_full)


# + months of life + is AI + months since GPT
#model_4_full<- lme(fixed = downloads ~ 1 +  monthOfLife+ is_AI + monthsSinceGPT,
model_4_full<- lme(fixed = downloads ~ 1 +  monthOfLife+ is_AI + months.post.ai,
                  data=fullDF, 
                  random= ~ 1 | package,
                  method="ML")

summary(model_4_full)


# + months of life + is AI + months since GPT + interaciton
#model_5_full<- lme(fixed = downloads ~ 1 +  monthOfLife+ is_AI + monthsSinceGPT+ monthsSinceGPT*is_AI,
model_5_full<- lme(fixed = downloads ~ 1 +  monthOfLife+ months.post.ai*is_AI,
                   data=fullDF, 
                   random= ~ 1 | package,
                   method="ML")

summary(model_5_full)

model_5_full.playing<- lme(fixed = downloads ~ 1 + months.post.ai*is_AI*monthOfLife,
                   data=fullDF, 
                   random= ~ 1 | package,
                   method="ML")

summary(model_5_full.playing)

con <- textConnection("mfinal.ai.tex", "w") #remembered
sink(con, split=TRUE, type="output")
texreg(model_5_full, omit.coef = 'factor', stars=NULL, digits=2, include.variance=TRUE,
       custom.model.names=c('Trajectory of AI and non-AI packages, Interrupted by ChatGPT'), #name here will appear at top of table
       custom.coef.names=c('(Intercept)', 'Months Old', 'Months After ChatGPT Released', 'Package is AI-related', 'Months After ChatGPT Released:Package is AI-related'), 
       use.packages=FALSE, table=FALSE, ci.force = TRUE)
sink()
close(con);rm(con)

BIC(model_1_full, model_2_full, model_3_full, model_4_full, model_5_full, model_5_full.playing)
AIC(model_1_full, model_2_full, model_3_full, model_4_full, model_5_full)

compareAIModels <- as.data.frame(BIC(model_1_full, model_2_full, model_3_full, model_4_full, model_5_full)) #remembered

#looks like BIC and AIC prefer model 5
#model 6 does not work on my lap top (random slope model even with REML)

#-------------------------------------------------------
## anything we want to use in the paper gets saved via a call to remember at the end of the file

if (!nosave) {
  r <- list()
  remember(m1.vuln.tex)
  remember(m2.vuln.tex)
  remember(mfinal.vuln.tex)
  remember(mfinal.ai.tex)
  remember(compareAIModels)
  remember(compareVModels)
  save(r, file=paste0(paperPath, "knitr_rdata/knitr_data.RData"))
  rm(r)
  print('remembrances done!')
}

save.image('mysession.RData')
